{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import libraries\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Excel File\n",
    "filename = 'data/car_financing.xlsx'\n",
    "df = pd.read_excel(filename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Filtering \n",
    "car_filter = df['car_type']=='Toyota Sienna'\n",
    "interest_filter = df['interest_rate']==0.0702\n",
    "df = df.loc[car_filter & interest_filter, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Approach 1 dictionary substitution using rename method\n",
    "df = df.rename(columns={'Starting Balance': 'starting_balance',\n",
    "                        'Interest Paid': 'interest_paid', \n",
    "                        'Principal Paid': 'principal_paid',\n",
    "                        'New Balance': 'new_balance'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Approach 2 list replacement\n",
    "# Only changing Month -> month, but we need to list the rest of the columns\n",
    "df.columns = ['month',\n",
    "              'starting_balance',\n",
    "              'Repayment',\n",
    "              'interest_paid',\n",
    "              'principal_paid',\n",
    "              'new_balance',\n",
    "              'term',\n",
    "              'interest_rate',\n",
    "              'car_type']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Approach 1\n",
    "# This approach allows you to drop multiple columns at a time \n",
    "df = df.drop(columns=['term'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Approach 2 use the del command\n",
    "del df['Repayment']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Aggregate Methods\n",
    "It is often a good idea to compute summary statistics."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Aggregate Method | Description\n",
    "--- | --- \n",
    "sum | sum of values\n",
    "cumsum | cumulative sum\n",
    "mean | mean of values\n",
    "median | arithmetic median of values\n",
    "min | minimum\n",
    "max | maximum\n",
    "mode | mode\n",
    "std | unbiased standard deviation\n",
    "var | unbiased variance\n",
    "quantile | compute rank-based statistics of elements"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sum the values in a column\n",
    "# total amount of interest paid over the course of the loan\n",
    "df['interest_paid'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sum all the values across all columns\n",
    "df.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "'Toyota Sienna' + 'Toyota Sienna'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Notice that by default it seems like the sum function ignores missing values. \n",
    "help(df['interest_paid'].sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The info method gives the column datatypes + number of non-null values\n",
    "# Notice that we seem to have 60 non-null values for all but the Interest Paid column. \n",
    "df.info()"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
